## [1] "December 27, 2025"
Badania są w większości skoncentrowane na elektrodach z elektrolitem wykonanym z wodorotlenku potasu (KOH) lub kwasu siarkowego (H2SO4). Najczęściej występujące materiały posiadają w badaniach lepsze rezultaty, choć znaczna większość przeprowadzonych badań raportuje względnie niską pojemność badanych elektrod (< 500 F/g).
Co wyróżnia te badania, w których odnotowano znacznie wyższą pojemność nie jest jasne, gdyż zbiór danych ma poważne braki - u około połowy atrybutów zbioru ponad połowę wartości zajmują wartości puste. U atrybutów z lepszym pokryciem choć jasne jest jakie są przedziały wartości dla lepszych rezultatów pojemności (np. okno stabilności około 0-0.5 V), istnieją w zbiorze także dane z podobnymi parametrami, ale znacznie gorszym wynikiem pojemności.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.5.2
library(plotly)
## Warning: package 'plotly' was built under R version 4.5.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(caret)
## Warning: package 'caret' was built under R version 4.5.2
## Loading required package: lattice
library(shapper)
library(DALEX)
## Warning: package 'DALEX' was built under R version 4.5.2
## Welcome to DALEX (version: 2.5.3).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
##
## Attaching package: 'DALEX'
## The following object is masked from 'package:dplyr':
##
## explain
data <- read.csv("data.csv")
nrow(data)
## [1] 925
summary(data)
## Ref. Limits.of.Potential.Window..V.
## Length:925 Length:925
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Lower.Limit.of.Potential.Window..V. Upper.Limit.of.Potential.Window..V.
## Min. :-1.1000 Min. :-0.2000
## 1st Qu.:-0.3000 1st Qu.: 0.4000
## Median : 0.0000 Median : 0.6000
## Mean :-0.2343 Mean : 0.6301
## 3rd Qu.: 0.0000 3rd Qu.: 0.8000
## Max. : 0.2000 Max. : 3.5000
## NA's :4 NA's :4
## Potential.Window..V. Current.Density..A.g. Capacitance..F.g.
## Min. :0.4000 Min. : 0.050 Min. : 1.4
## 1st Qu.:0.6000 1st Qu.: 1.000 1st Qu.: 148.6
## Median :0.8250 Median : 2.000 Median : 260.2
## Mean :0.8634 Mean : 5.857 Mean : 415.5
## 3rd Qu.:1.0000 3rd Qu.: 5.000 3rd Qu.: 509.9
## Max. :3.5000 Max. :200.000 Max. :3344.1
## NA's :5 NA's :16 NA's :17
## Specific.Surface.Area..m.2.g. Charge.Transfer.Resistance..Rct...ohm.
## Min. : 8.896 Min. : 0.080
## 1st Qu.: 57.000 1st Qu.: 0.670
## Median : 159.970 Median : 1.540
## Mean : 417.438 Mean : 3.048
## 3rd Qu.: 546.000 3rd Qu.: 3.240
## Max. :2400.000 Max. :24.200
## NA's :572 NA's :786
## Equivalent.Series.Resistance..Rs...ohm. Electrode.Configuration
## Min. : 0.200 Length:925
## 1st Qu.: 0.350 Class :character
## Median : 0.580 Mode :character
## Mean : 1.602
## 3rd Qu.: 2.000
## Max. :17.500
## NA's :772
## Pore.Size..nm. Pore.Volume..cm.3.g. Ratio.of.ID.IG N.at.
## Min. : 0.530 Min. :0.0200 Min. :0.120 Min. : 0.00
## 1st Qu.: 3.045 1st Qu.:0.1680 1st Qu.:0.940 1st Qu.: 0.00
## Median : 4.337 Median :0.2170 Median :1.050 Median : 0.00
## Mean : 8.618 Mean :0.4857 Mean :1.121 Mean : 2.50
## 3rd Qu.:13.625 3rd Qu.:0.5075 3rd Qu.:1.170 3rd Qu.: 3.20
## Max. :44.131 Max. :2.3500 Max. :2.900 Max. :23.82
## NA's :769 NA's :729 NA's :596 NA's :690
## C.at. O.at. Electrolyte.Chemical.Formula
## Min. : 1.40 Min. : 1.900 Length:925
## 1st Qu.:37.32 1st Qu.: 8.883 Class :character
## Median :81.00 Median :13.700 Mode :character
## Mean :66.52 Mean :19.176
## 3rd Qu.:85.58 3rd Qu.:27.098
## Max. :98.10 Max. :54.280
## NA's :699 NA's :703
## Electrolyte.Ionic.Conductivity Electrolyte.Concentration..M.
## Min. :1.000 Min. :0.100
## 1st Qu.:6.000 1st Qu.:1.000
## Median :6.000 Median :1.000
## Mean :5.806 Mean :2.576
## 3rd Qu.:7.000 3rd Qu.:6.000
## Max. :8.000 Max. :6.000
## NA's :99 NA's :62
## Cell.Configuration..three.two.electrode.system.
## Length:925
## Class :character
## Mode :character
##
##
##
##
Zasady czyszczenia danych:
keep_cols <- colSums(is.na(data)) < nrow(data) / 2
keep_cols["Limits.of.Potential.Window..V."] <- FALSE
medians <- sapply(data[, sapply(data, is.numeric)], median, na.rm = TRUE)
median_cols <- intersect(colnames(data)[keep_cols], names(medians))
data <- select(data, colnames(data)[keep_cols])
keep_rows <- rowSums(is.na(data)) < sum(keep_cols) / 2
data <- filter(data, keep_rows)
for (col in median_cols) {
data[is.na(data[, col]), col] <- medians[col]
}
str_strip <- function(x) {
x_sub <- x
while (substring(x_sub, 1, 1) == " ") {
x_sub <- substring(x_sub, 2)
}
while (substring(x_sub, nchar(x_sub)) == " ") {
x_sub <- substring(x_sub, 1, nchar(x_sub) - 1)
}
return(x_sub)
}
string_cols <- colnames(data)[!sapply(data, is.numeric)]
for (col in string_cols) {
data[, col] <- sapply(data[, col], str_strip)
tt <- table(data[, col])
max_val <- names(tt[tt == max(tt)])[1]
data[data[, col] == "", col] <- max_val
}
nrow(data)
## [1] 925
summary(data)
## Ref. Lower.Limit.of.Potential.Window..V.
## Length:925 Min. :-1.1000
## Class :character 1st Qu.:-0.3000
## Mode :character Median : 0.0000
## Mean :-0.2333
## 3rd Qu.: 0.0000
## Max. : 0.2000
## Upper.Limit.of.Potential.Window..V. Potential.Window..V. Current.Density..A.g.
## Min. :-0.2000 Min. :0.4000 Min. : 0.05
## 1st Qu.: 0.4000 1st Qu.:0.6000 1st Qu.: 1.00
## Median : 0.6000 Median :0.8250 Median : 2.00
## Mean : 0.6299 Mean :0.8632 Mean : 5.79
## 3rd Qu.: 0.8000 3rd Qu.:1.0000 3rd Qu.: 5.00
## Max. : 3.5000 Max. :3.5000 Max. :200.00
## Capacitance..F.g. Electrode.Configuration Electrolyte.Chemical.Formula
## Min. : 1.4 Length:925 Length:925
## 1st Qu.: 150.8 Class :character Class :character
## Median : 260.2 Mode :character Mode :character
## Mean : 412.6
## 3rd Qu.: 493.6
## Max. :3344.1
## Electrolyte.Ionic.Conductivity Electrolyte.Concentration..M.
## Min. :1.000 Min. :0.10
## 1st Qu.:6.000 1st Qu.:1.00
## Median :6.000 Median :1.00
## Mean :5.827 Mean :2.47
## 3rd Qu.:7.000 3rd Qu.:6.00
## Max. :8.000 Max. :6.00
## Cell.Configuration..three.two.electrode.system.
## Length:925
## Class :character
## Mode :character
##
##
##
for (col in colnames(data)) {
if (is.numeric(data[, col])) {
hist(data[, col], main = col,
xlab = "Wartość", ylab = "Wystąpienia")
} else if (!(col %in% c("Ref.", "Electrode.Configuration"))) {
tt <- table(data[, col])
val_cnt <- data.frame(vals = names(tt),
counts = as.numeric(tt))
print(ggplot(val_cnt, aes(x = vals, y = counts)) +
geom_bar(stat = "identity") +
labs(x = "Wartość", y = "Wystąpienia", title = col) +
theme(axis.text.x = element_text(angle = 30, vjust = 0.5)))
}
}
W zestwieniu pominięto atrybut konfiguracji elektrody, gdyż jest to atrybut tekstowy z aż 353 różnymi wartościami. Nie jest możliwe zaprezentowanie rozkładu wartości tego atrybutu w raporcie w czytelny sposób.
elec_conf_vallist <- unique(data$Electrode.Configuration)
elec_chem_vallist <- unique(data$Electrolyte.Chemical.Formula)
cell_conf_vallist <- unique(data$Cell.Configuration..three.two.electrode.system.)
data_w_enums <- data %>%
mutate(Electrode.Configuration = match(Electrode.Configuration, elec_conf_vallist)) %>%
mutate(Electrolyte.Chemical.Formula = match(Electrolyte.Chemical.Formula, elec_chem_vallist)) %>%
mutate(Cell.Configuration..three.two.electrode.system. = match(Cell.Configuration..three.two.electrode.system., cell_conf_vallist))
ggcorrplot(cor(select(data_w_enums, where(is.numeric))))
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggcorrplot package.
## Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p <- ggplot(data) +
geom_linerange(aes(x = Capacitance..F.g.,
ymin = Lower.Limit.of.Potential.Window..V.,
ymax = Upper.Limit.of.Potential.Window..V.,
color = Cell.Configuration..three.two.electrode.system.),
alpha = 0.15) +
labs(y = "Potential.Window..V.", color = "Cell Configuration",
title = "Porównanie konfiguracji elektrod") +
coord_flip()
ggplotly(p)
n_list <- names(head(sort(table(data$Electrolyte.Chemical.Formula), decreasing = TRUE), 12))
data_filtered <- data %>%
filter(Electrolyte.Chemical.Formula %in% n_list)
p <- ggplot(data_filtered) +
geom_linerange(aes(x = Capacitance..F.g.,
ymin = Lower.Limit.of.Potential.Window..V.,
ymax = Upper.Limit.of.Potential.Window..V.,
color = Electrolyte.Chemical.Formula),
alpha = 0.2) +
labs(y = "Potential.Window..V.", color = "Chemical Formula",
title = "Porównanie najczęstszych składów chemicznych elektrolitu") +
coord_flip()
ggplotly(p)
p <- ggplot(data_filtered) +
geom_point(aes(x = Capacitance..F.g.,
y = Current.Density..A.g.,
color = Electrolyte.Chemical.Formula))
ggplotly(p)
p <- ggplot(data_filtered) +
geom_point(aes(x = Capacitance..F.g.,
y = Electrolyte.Ionic.Conductivity,
color = Electrolyte.Chemical.Formula))
ggplotly(p)
p <- ggplot(data_filtered) +
geom_point(aes(x = Capacitance..F.g.,
y = Electrolyte.Concentration..M.,
color = Electrolyte.Chemical.Formula))
ggplotly(p)
Najlepszą pojemność osiągają materiały których okno stabilności mieści się około granic 0-0.5 V. Konfiguracja trójelektrodowa okazuje znacznie lepszą pojemność niż dwuelektrodowa kosztem węższego okna stabilności. Najwyższe pojemności ze zbioru osiągają materiały z eletrolitem wykonanym z wodorotlenku potasu (KOH). Gęstość natężenia jest w większości danych niska (< 10 A/g), choć z jej wzrostem spada maksymalna zanotowana wartość pojemności.
# zmiana kolumn tekstowych na enumy
elec_conf_vallist <- unique(data$Electrode.Configuration)
elec_chem_vallist <- unique(data$Electrolyte.Chemical.Formula)
cell_conf_vallist <- unique(data$Cell.Configuration..three.two.electrode.system.)
ml_data <- data %>%
mutate(Electrode.Configuration = match(Electrode.Configuration, elec_conf_vallist)) %>%
mutate(Electrolyte.Chemical.Formula = match(Electrolyte.Chemical.Formula, elec_chem_vallist)) %>%
mutate(Cell.Configuration..three.two.electrode.system. = match(Cell.Configuration..three.two.electrode.system., cell_conf_vallist)) %>%
select(-Ref.)
# podział zbioru danych
set.seed(9001)
is_training <- createDataPartition(
y = ml_data$Capacitance..F.g.,
p = .75,
list = FALSE
)
training_data <- ml_data[is_training, ]
testing_data <- ml_data[-is_training, ]
# uczenie
ctrl <- trainControl(
method = "cv",
# number = 2,
# repeats = 10
)
set.seed(1337)
fit <- train(
Capacitance..F.g. ~ .,
data = training_data,
method = "lm",
trControl = ctrl
)
# predykcja
predictions <- predict(fit, newdata = select(testing_data, -Capacitance..F.g.))
# shap
explainer <- explain(fit,
data = training_data %>% select(-Capacitance..F.g.),
y = as.numeric(training_data$Capacitance..F.g.))
## Preparation of a new explainer is initiated
## -> model label : train.formula ( default )
## -> data : 696 rows 9 cols
## -> target variable : 696 values
## -> predict function : yhat.train will be used ( default )
## -> predicted values : No value for predict function target column. ( default )
## -> model_info : package caret , ver. 7.0.1 , task regression ( default )
## -> predicted values : numerical, min = -509.2363 , mean = 405.4463 , max = 743.2534
## -> residual function : difference between y and yhat ( default )
## -> residuals : numerical, min = -529.2909 , mean = 1.41106e-12 , max = 2414.24
## A new explainer has been created!
shapped <- shap(explainer,
new_observation = testing_data[1, ] %>%
select(-Capacitance..F.g.))
plot(shapped)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the shapper package.
## Please report the issue at <https://github.com/ModelOriented/shapper/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.